
#include "stdafx.h"
#include "Skein.h"

namespace Skein
{
    const ulong SKEIN_256_IV_224[] =
    {
        0xB80929699AE0F431,
        0xD340DC14A06929DC,
        0xAE866594BDE4DC5A,
        0x339767C25A60EA1D
    };

    const ulong SKEIN_256_IV_256[] =
    {
        0x388512680E660046,
        0x4B72D5DEC5A8FF01,
        0x281A9298CA5EB3A5,
        0x54CA5249F46070C4
    };

    const ulong SKEIN_512_IV_384[] =
    {
        0xE5BF4D02BA62494C,
        0x7AA1EABCC3E6FC68,
        0xBBE5FC26E1038C5A,
        0x53C9903E8F88E9FA,
        0xF30D8DDDFB940C83,
        0x500FDA3C4865ABEC,
        0x2226C67F745BC5E7,
        0x015DA80077C639F7
    };

    const ulong SKEIN_512_IV_512[] =
    {
        0xA8D47980544A6E32,
        0x847511533E9B1A8A,
        0x6FAEE870D8E81A00,
        0x58B0D9D6CB557F92,
        0x9BBC0051DAC1D4E9,
        0xB744E2B1D189E7CA,
        0x979350FA709C5EF3,
        0x0350125A92067BCD
    };                                                           

    void Skein256::TransformBlock(byte* a_data, int a_index)
    { 
        a_data += a_index;

        ulong  kw0, kw1, kw2, kw3, kw4, kw5, kw6, kw7;
        ulong  X0, X1, X2, X3;
        ulong  w[4];

        const int R_256_0_0 = 5;
        const int R_256_0_1 = 56;
        const int R_256_1_0 = 36;
        const int R_256_1_1 = 28;
        const int R_256_2_0 = 13;
        const int R_256_2_1 = 46;
        const int R_256_3_0 = 58;
        const int R_256_3_1 = 44;
        const int R_256_4_0 = 26;
        const int R_256_4_1 = 20;
        const int R_256_5_0 = 53;
        const int R_256_5_1 = 35;
        const int R_256_6_0 = 11;
        const int R_256_6_1 = 42;
        const int R_256_7_0 = 59;
        const int R_256_7_1 = 50;

        kw0 = m_processed_bytes;
        kw1 = m_flags;
        kw2 = kw0 ^ kw1;
        kw3 = m_state[0];
        kw4 = m_state[1];
        kw5 = m_state[2];
        kw6 = m_state[3];
        kw7 = kw3 ^ kw4 ^ kw5 ^ kw6 ^ 0x5555555555555555;

        memcpy(w, a_data, BlockSize);

        X0 = w[0] + kw3;
        X1 = w[1] + kw4 + kw0;
        X2 = w[2] + kw5 + kw1;
        X3 = w[3] + kw6;

        X0 += X1;
        X1 = (X1 << R_256_0_0) | (X1 >> (64 - R_256_0_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_0_1) | (X3 >> (64 - R_256_0_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_1_0) | (X3 >> (64 - R_256_1_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_1_1) | (X1 >> (64 - R_256_1_1));
        X1 ^= X2;
        X0 += X1;
        X1 = (X1 << R_256_2_0) | (X1 >> (64 - R_256_2_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_2_1) | (X3 >> (64 - R_256_2_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_3_0) | (X3 >> (64 - R_256_3_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_3_1) | (X1 >> (64 - R_256_3_1));
        X1 ^= X2;
        X0 += kw4;
        X1 += kw5 + kw1;
        X2 += kw6 + kw2;
        X3 += kw7 + 1;
        X0 += X1;
        X1 = (X1 << R_256_4_0) | (X1 >> (64 - R_256_4_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_4_1) | (X3 >> (64 - R_256_4_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_5_0) | (X3 >> (64 - R_256_5_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_5_1) | (X1 >> (64 - R_256_5_1));
        X1 ^= X2;
        X0 += X1;
        X1 = (X1 << R_256_6_0) | (X1 >> (64 - R_256_6_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_6_1) | (X3 >> (64 - R_256_6_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_7_0) | (X3 >> (64 - R_256_7_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_7_1) | (X1 >> (64 - R_256_7_1));
        X1 ^= X2;
        X0 += kw5;
        X1 += kw6 + kw2;
        X2 += kw7 + kw0;
        X3 += kw3 + 2;


        X0 += X1;
        X1 = (X1 << R_256_0_0) | (X1 >> (64 - R_256_0_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_0_1) | (X3 >> (64 - R_256_0_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_1_0) | (X3 >> (64 - R_256_1_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_1_1) | (X1 >> (64 - R_256_1_1));
        X1 ^= X2;
        X0 += X1;
        X1 = (X1 << R_256_2_0) | (X1 >> (64 - R_256_2_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_2_1) | (X3 >> (64 - R_256_2_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_3_0) | (X3 >> (64 - R_256_3_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_3_1) | (X1 >> (64 - R_256_3_1));
        X1 ^= X2;
        X0 += kw6;
        X1 += kw7 + kw0;
        X2 += kw3 + kw1;
        X3 += kw4 + 3;
        X0 += X1;
        X1 = (X1 << R_256_4_0) | (X1 >> (64 - R_256_4_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_4_1) | (X3 >> (64 - R_256_4_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_5_0) | (X3 >> (64 - R_256_5_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_5_1) | (X1 >> (64 - R_256_5_1));
        X1 ^= X2;
        X0 += X1;
        X1 = (X1 << R_256_6_0) | (X1 >> (64 - R_256_6_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_6_1) | (X3 >> (64 - R_256_6_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_7_0) | (X3 >> (64 - R_256_7_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_7_1) | (X1 >> (64 - R_256_7_1));
        X1 ^= X2;
        X0 += kw7;
        X1 += kw3 + kw1;
        X2 += kw4 + kw2;
        X3 += kw5 + 4;


        X0 += X1;
        X1 = (X1 << R_256_0_0) | (X1 >> (64 - R_256_0_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_0_1) | (X3 >> (64 - R_256_0_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_1_0) | (X3 >> (64 - R_256_1_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_1_1) | (X1 >> (64 - R_256_1_1));
        X1 ^= X2;
        X0 += X1;
        X1 = (X1 << R_256_2_0) | (X1 >> (64 - R_256_2_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_2_1) | (X3 >> (64 - R_256_2_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_3_0) | (X3 >> (64 - R_256_3_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_3_1) | (X1 >> (64 - R_256_3_1));
        X1 ^= X2;
        X0 += kw3;
        X1 += kw4 + kw2;
        X2 += kw5 + kw0;
        X3 += kw6 + 5;
        X0 += X1;
        X1 = (X1 << R_256_4_0) | (X1 >> (64 - R_256_4_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_4_1) | (X3 >> (64 - R_256_4_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_5_0) | (X3 >> (64 - R_256_5_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_5_1) | (X1 >> (64 - R_256_5_1));
        X1 ^= X2;
        X0 += X1;
        X1 = (X1 << R_256_6_0) | (X1 >> (64 - R_256_6_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_6_1) | (X3 >> (64 - R_256_6_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_7_0) | (X3 >> (64 - R_256_7_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_7_1) | (X1 >> (64 - R_256_7_1));
        X1 ^= X2;
        X0 += kw4;
        X1 += kw5 + kw0;
        X2 += kw6 + kw1;
        X3 += kw7 + 6;

        X0 += X1;
        X1 = (X1 << R_256_0_0) | (X1 >> (64 - R_256_0_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_0_1) | (X3 >> (64 - R_256_0_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_1_0) | (X3 >> (64 - R_256_1_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_1_1) | (X1 >> (64 - R_256_1_1));
        X1 ^= X2;
        X0 += X1;
        X1 = (X1 << R_256_2_0) | (X1 >> (64 - R_256_2_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_2_1) | (X3 >> (64 - R_256_2_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_3_0) | (X3 >> (64 - R_256_3_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_3_1) | (X1 >> (64 - R_256_3_1));
        X1 ^= X2;
        X0 += kw5;
        X1 += kw6 + kw1;
        X2 += kw7 + kw2;
        X3 += kw3 + 7;
        X0 += X1;
        X1 = (X1 << R_256_4_0) | (X1 >> (64 - R_256_4_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_4_1) | (X3 >> (64 - R_256_4_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_5_0) | (X3 >> (64 - R_256_5_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_5_1) | (X1 >> (64 - R_256_5_1));
        X1 ^= X2;
        X0 += X1;
        X1 = (X1 << R_256_6_0) | (X1 >> (64 - R_256_6_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_6_1) | (X3 >> (64 - R_256_6_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_7_0) | (X3 >> (64 - R_256_7_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_7_1) | (X1 >> (64 - R_256_7_1));
        X1 ^= X2;
        X0 += kw6;
        X1 += kw7 + kw2;
        X2 += kw3 + kw0;
        X3 += kw4 + 8;


        X0 += X1;
        X1 = (X1 << R_256_0_0) | (X1 >> (64 - R_256_0_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_0_1) | (X3 >> (64 - R_256_0_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_1_0) | (X3 >> (64 - R_256_1_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_1_1) | (X1 >> (64 - R_256_1_1));
        X1 ^= X2;
        X0 += X1;
        X1 = (X1 << R_256_2_0) | (X1 >> (64 - R_256_2_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_2_1) | (X3 >> (64 - R_256_2_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_3_0) | (X3 >> (64 - R_256_3_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_3_1) | (X1 >> (64 - R_256_3_1));
        X1 ^= X2;
        X0 += kw7;
        X1 += kw3 + kw0;
        X2 += kw4 + kw1;
        X3 += kw5 + 9;
        X0 += X1;
        X1 = (X1 << R_256_4_0) | (X1 >> (64 - R_256_4_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_4_1) | (X3 >> (64 - R_256_4_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_5_0) | (X3 >> (64 - R_256_5_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_5_1) | (X1 >> (64 - R_256_5_1));
        X1 ^= X2;
        X0 += X1;
        X1 = (X1 << R_256_6_0) | (X1 >> (64 - R_256_6_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_6_1) | (X3 >> (64 - R_256_6_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_7_0) | (X3 >> (64 - R_256_7_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_7_1) | (X1 >> (64 - R_256_7_1));
        X1 ^= X2;
        X0 += kw3;
        X1 += kw4 + kw1;
        X2 += kw5 + kw2;
        X3 += kw6 + 10;


        X0 += X1;
        X1 = (X1 << R_256_0_0) | (X1 >> (64 - R_256_0_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_0_1) | (X3 >> (64 - R_256_0_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_1_0) | (X3 >> (64 - R_256_1_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_1_1) | (X1 >> (64 - R_256_1_1));
        X1 ^= X2;
        X0 += X1;
        X1 = (X1 << R_256_2_0) | (X1 >> (64 - R_256_2_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_2_1) | (X3 >> (64 - R_256_2_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_3_0) | (X3 >> (64 - R_256_3_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_3_1) | (X1 >> (64 - R_256_3_1));
        X1 ^= X2;
        X0 += kw4;
        X1 += kw5 + kw2;
        X2 += kw6 + kw0;
        X3 += kw7 + 11;
        X0 += X1;
        X1 = (X1 << R_256_4_0) | (X1 >> (64 - R_256_4_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_4_1) | (X3 >> (64 - R_256_4_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_5_0) | (X3 >> (64 - R_256_5_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_5_1) | (X1 >> (64 - R_256_5_1));
        X1 ^= X2;
        X0 += X1;
        X1 = (X1 << R_256_6_0) | (X1 >> (64 - R_256_6_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_6_1) | (X3 >> (64 - R_256_6_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_7_0) | (X3 >> (64 - R_256_7_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_7_1) | (X1 >> (64 - R_256_7_1));
        X1 ^= X2;
        X0 += kw5;
        X1 += kw6 + kw0;
        X2 += kw7 + kw1;
        X3 += kw3 + 12;


        X0 += X1;
        X1 = (X1 << R_256_0_0) | (X1 >> (64 - R_256_0_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_0_1) | (X3 >> (64 - R_256_0_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_1_0) | (X3 >> (64 - R_256_1_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_1_1) | (X1 >> (64 - R_256_1_1));
        X1 ^= X2;
        X0 += X1;
        X1 = (X1 << R_256_2_0) | (X1 >> (64 - R_256_2_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_2_1) | (X3 >> (64 - R_256_2_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_3_0) | (X3 >> (64 - R_256_3_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_3_1) | (X1 >> (64 - R_256_3_1));
        X1 ^= X2;
        X0 += kw6;
        X1 += kw7 + kw1;
        X2 += kw3 + kw2;
        X3 += kw4 + 13;
        X0 += X1;
        X1 = (X1 << R_256_4_0) | (X1 >> (64 - R_256_4_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_4_1) | (X3 >> (64 - R_256_4_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_5_0) | (X3 >> (64 - R_256_5_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_5_1) | (X1 >> (64 - R_256_5_1));
        X1 ^= X2;
        X0 += X1;
        X1 = (X1 << R_256_6_0) | (X1 >> (64 - R_256_6_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_6_1) | (X3 >> (64 - R_256_6_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_7_0) | (X3 >> (64 - R_256_7_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_7_1) | (X1 >> (64 - R_256_7_1));
        X1 ^= X2;
        X0 += kw7;
        X1 += kw3 + kw2;
        X2 += kw4 + kw0;
        X3 += kw5 + 14;


        X0 += X1;
        X1 = (X1 << R_256_0_0) | (X1 >> (64 - R_256_0_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_0_1) | (X3 >> (64 - R_256_0_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_1_0) | (X3 >> (64 - R_256_1_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_1_1) | (X1 >> (64 - R_256_1_1));
        X1 ^= X2;
        X0 += X1;
        X1 = (X1 << R_256_2_0) | (X1 >> (64 - R_256_2_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_2_1) | (X3 >> (64 - R_256_2_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_3_0) | (X3 >> (64 - R_256_3_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_3_1) | (X1 >> (64 - R_256_3_1));
        X1 ^= X2;
        X0 += kw3;
        X1 += kw4 + kw0;
        X2 += kw5 + kw1;
        X3 += kw6 + 15;
        X0 += X1;
        X1 = (X1 << R_256_4_0) | (X1 >> (64 - R_256_4_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_4_1) | (X3 >> (64 - R_256_4_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_5_0) | (X3 >> (64 - R_256_5_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_5_1) | (X1 >> (64 - R_256_5_1));
        X1 ^= X2;
        X0 += X1;
        X1 = (X1 << R_256_6_0) | (X1 >> (64 - R_256_6_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_6_1) | (X3 >> (64 - R_256_6_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_7_0) | (X3 >> (64 - R_256_7_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_7_1) | (X1 >> (64 - R_256_7_1));
        X1 ^= X2;
        X0 += kw4;
        X1 += kw5 + kw1;
        X2 += kw6 + kw2;
        X3 += kw7 + 16;


        X0 += X1;
        X1 = (X1 << R_256_0_0) | (X1 >> (64 - R_256_0_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_0_1) | (X3 >> (64 - R_256_0_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_1_0) | (X3 >> (64 - R_256_1_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_1_1) | (X1 >> (64 - R_256_1_1));
        X1 ^= X2;
        X0 += X1;
        X1 = (X1 << R_256_2_0) | (X1 >> (64 - R_256_2_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_2_1) | (X3 >> (64 - R_256_2_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_3_0) | (X3 >> (64 - R_256_3_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_3_1) | (X1 >> (64 - R_256_3_1));
        X1 ^= X2;
        X0 += kw5;
        X1 += kw6 + kw2;
        X2 += kw7 + kw0;
        X3 += kw3 + 17;
        X0 += X1;
        X1 = (X1 << R_256_4_0) | (X1 >> (64 - R_256_4_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_4_1) | (X3 >> (64 - R_256_4_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_5_0) | (X3 >> (64 - R_256_5_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_5_1) | (X1 >> (64 - R_256_5_1));
        X1 ^= X2;
        X0 += X1;
        X1 = (X1 << R_256_6_0) | (X1 >> (64 - R_256_6_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_256_6_1) | (X3 >> (64 - R_256_6_1));
        X3 ^= X2;
        X0 += X3;
        X3 = (X3 << R_256_7_0) | (X3 >> (64 - R_256_7_0));
        X3 ^= X0;
        X2 += X1;
        X1 = (X1 << R_256_7_1) | (X1 >> (64 - R_256_7_1));
        X1 ^= X2;
        X0 += kw6;
        X1 += kw7 + kw0;
        X2 += kw3 + kw1;
        X3 += kw4 + 18;

        m_state[0] = X0 ^ w[0];
        m_state[1] = X1 ^ w[1];
        m_state[2] = X2 ^ w[2];
        m_state[3] = X3 ^ w[3];

        m_flags = kw1 & ~0x4000000000000000;
    }

    void Skein512::TransformBlock(byte* a_data, int a_index)
    { 
        a_data += a_index;

        ulong  kw0, kw1, kw2, kw3, kw4, kw5, kw6, kw7, kw8, kw9, kw10, kw11;
        ulong  X0, X1, X2, X3, X4, X5, X6, X7;
        ulong  w[8];

        const int R_512_0_0 = 38;
        const int R_512_0_1 = 30;
        const int R_512_0_2 = 50;
        const int R_512_0_3 = 53;
        const int R_512_1_0 = 48;
        const int R_512_1_1 = 20;
        const int R_512_1_2 = 43;
        const int R_512_1_3 = 31;
        const int R_512_2_0 = 34;
        const int R_512_2_1 = 14;
        const int R_512_2_2 = 15;
        const int R_512_2_3 = 27;
        const int R_512_3_0 = 26;
        const int R_512_3_1 = 12;
        const int R_512_3_2 = 58;
        const int R_512_3_3 = 7;
        const int R_512_4_0 = 33;
        const int R_512_4_1 = 49;
        const int R_512_4_2 = 8;
        const int R_512_4_3 = 42;
        const int R_512_5_0 = 39;
        const int R_512_5_1 = 27;
        const int R_512_5_2 = 41;
        const int R_512_5_3 = 14;
        const int R_512_6_0 = 29;
        const int R_512_6_1 = 26;
        const int R_512_6_2 = 11;
        const int R_512_6_3 = 9;
        const int R_512_7_0 = 33;
        const int R_512_7_1 = 51;
        const int R_512_7_2 = 39;
        const int R_512_7_3 = 35;

        kw0 = m_processed_bytes;
        kw1 = m_flags;
        kw2 = kw0 ^ kw1;
        kw3 = m_state[0];
        kw4 = m_state[1];
        kw5 = m_state[2];
        kw6 = m_state[3];
        kw7 = m_state[4];
        kw8 = m_state[5];
        kw9 = m_state[6];
        kw10 = m_state[7];
        kw11 = kw3 ^ kw4 ^ kw5 ^ kw6 ^ kw7 ^ kw8 ^ kw9 ^ kw10 ^ 0x5555555555555555;

        memcpy(w, a_data, BlockSize);

        X0 = w[0] + kw3;
        X1 = w[1] + kw4;
        X2 = w[2] + kw5;
        X3 = w[3] + kw6;
        X4 = w[4] + kw7;
        X5 = w[5] + kw8 + kw0;
        X6 = w[6] + kw9 + kw1;
        X7 = w[7] + kw10;

        X0 += X1;
        X1 = (X1 << R_512_0_0) | (X1 >> (64 - R_512_0_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_512_0_1) | (X3 >> (64 - R_512_0_1));
        X3 ^= X2;
        X4 += X5;
        X5 = (X5 << R_512_0_2) | (X5 >> (64 - R_512_0_2));
        X5 ^= X4;
        X6 += X7;
        X7 = (X7 << R_512_0_3) | (X7 >> (64 - R_512_0_3));
        X7 ^= X6;
        X2 += X1;
        X1 = (X1 << R_512_1_0) | (X1 >> (64 - R_512_1_0));
        X1 ^= X2;
        X4 += X7;
        X7 = (X7 << R_512_1_1) | (X7 >> (64 - R_512_1_1));
        X7 ^= X4;
        X6 += X5;
        X5 = (X5 << R_512_1_2) | (X5 >> (64 - R_512_1_2));
        X5 ^= X6;
        X0 += X3;
        X3 = (X3 << R_512_1_3) | (X3 >> (64 - R_512_1_3));
        X3 ^= X0;
        X4 += X1;
        X1 = (X1 << R_512_2_0) | (X1 >> (64 - R_512_2_0));
        X1 ^= X4;
        X6 += X3;
        X3 = (X3 << R_512_2_1) | (X3 >> (64 - R_512_2_1));
        X3 ^= X6;
        X0 += X5;
        X5 = (X5 << R_512_2_2) | (X5 >> (64 - R_512_2_2));
        X5 ^= X0;
        X2 += X7;
        X7 = (X7 << R_512_2_3) | (X7 >> (64 - R_512_2_3));
        X7 ^= X2;
        X6 += X1;
        X1 = (X1 << R_512_3_0) | (X1 >> (64 - R_512_3_0));
        X1 ^= X6;
        X0 += X7;
        X7 = (X7 << R_512_3_1) | (X7 >> (64 - R_512_3_1));
        X7 ^= X0;
        X2 += X5;
        X5 = (X5 << R_512_3_2) | (X5 >> (64 - R_512_3_2));
        X5 ^= X2;
        X4 += X3;
        X3 = (X3 << R_512_3_3) | (X3 >> (64 - R_512_3_3));
        X3 ^= X4;
        X0 += kw4;
        X1 += kw5;
        X2 += kw6;
        X3 += kw7;
        X4 += kw8;
        X5 += kw9 + kw1;
        X6 += kw10 + kw2;
        X7 += kw11 + 1;
        X0 += X1;
        X1 = (X1 << R_512_4_0) | (X1 >> (64 - R_512_4_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_512_4_1) | (X3 >> (64 - R_512_4_1));
        X3 ^= X2;
        X4 += X5;
        X5 = (X5 << R_512_4_2) | (X5 >> (64 - R_512_4_2));
        X5 ^= X4;
        X6 += X7;
        X7 = (X7 << R_512_4_3) | (X7 >> (64 - R_512_4_3));
        X7 ^= X6;
        X2 += X1;
        X1 = (X1 << R_512_5_0) | (X1 >> (64 - R_512_5_0));
        X1 ^= X2;
        X4 += X7;
        X7 = (X7 << R_512_5_1) | (X7 >> (64 - R_512_5_1));
        X7 ^= X4;
        X6 += X5;
        X5 = (X5 << R_512_5_2) | (X5 >> (64 - R_512_5_2));
        X5 ^= X6;
        X0 += X3;
        X3 = (X3 << R_512_5_3) | (X3 >> (64 - R_512_5_3));
        X3 ^= X0;
        X4 += X1;
        X1 = (X1 << R_512_6_0) | (X1 >> (64 - R_512_6_0));
        X1 ^= X4;
        X6 += X3;
        X3 = (X3 << R_512_6_1) | (X3 >> (64 - R_512_6_1));
        X3 ^= X6;
        X0 += X5;
        X5 = (X5 << R_512_6_2) | (X5 >> (64 - R_512_6_2));
        X5 ^= X0;
        X2 += X7;
        X7 = (X7 << R_512_6_3) | (X7 >> (64 - R_512_6_3));
        X7 ^= X2;
        X6 += X1;
        X1 = (X1 << R_512_7_0) | (X1 >> (64 - R_512_7_0));
        X1 ^= X6;
        X0 += X7;
        X7 = (X7 << R_512_7_1) | (X7 >> (64 - R_512_7_1));
        X7 ^= X0;
        X2 += X5;
        X5 = (X5 << R_512_7_2) | (X5 >> (64 - R_512_7_2));
        X5 ^= X2;
        X4 += X3;
        X3 = (X3 << R_512_7_3) | (X3 >> (64 - R_512_7_3));
        X3 ^= X4;
        X0 += kw5;
        X1 += kw6;
        X2 += kw7;
        X3 += kw8;
        X4 += kw9;
        X5 += kw10 + kw2;
        X6 += kw11 + kw0;
        X7 += kw3 + 2;


        X0 += X1;
        X1 = (X1 << R_512_0_0) | (X1 >> (64 - R_512_0_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_512_0_1) | (X3 >> (64 - R_512_0_1));
        X3 ^= X2;
        X4 += X5;
        X5 = (X5 << R_512_0_2) | (X5 >> (64 - R_512_0_2));
        X5 ^= X4;
        X6 += X7;
        X7 = (X7 << R_512_0_3) | (X7 >> (64 - R_512_0_3));
        X7 ^= X6;
        X2 += X1;
        X1 = (X1 << R_512_1_0) | (X1 >> (64 - R_512_1_0));
        X1 ^= X2;
        X4 += X7;
        X7 = (X7 << R_512_1_1) | (X7 >> (64 - R_512_1_1));
        X7 ^= X4;
        X6 += X5;
        X5 = (X5 << R_512_1_2) | (X5 >> (64 - R_512_1_2));
        X5 ^= X6;
        X0 += X3;
        X3 = (X3 << R_512_1_3) | (X3 >> (64 - R_512_1_3));
        X3 ^= X0;
        X4 += X1;
        X1 = (X1 << R_512_2_0) | (X1 >> (64 - R_512_2_0));
        X1 ^= X4;
        X6 += X3;
        X3 = (X3 << R_512_2_1) | (X3 >> (64 - R_512_2_1));
        X3 ^= X6;
        X0 += X5;
        X5 = (X5 << R_512_2_2) | (X5 >> (64 - R_512_2_2));
        X5 ^= X0;
        X2 += X7;
        X7 = (X7 << R_512_2_3) | (X7 >> (64 - R_512_2_3));
        X7 ^= X2;
        X6 += X1;
        X1 = (X1 << R_512_3_0) | (X1 >> (64 - R_512_3_0));
        X1 ^= X6;
        X0 += X7;
        X7 = (X7 << R_512_3_1) | (X7 >> (64 - R_512_3_1));
        X7 ^= X0;
        X2 += X5;
        X5 = (X5 << R_512_3_2) | (X5 >> (64 - R_512_3_2));
        X5 ^= X2;
        X4 += X3;
        X3 = (X3 << R_512_3_3) | (X3 >> (64 - R_512_3_3));
        X3 ^= X4;
        X0 += kw6;
        X1 += kw7;
        X2 += kw8;
        X3 += kw9;
        X4 += kw10;
        X5 += kw11 + kw0;
        X6 += kw3 + kw1;
        X7 += kw4 + 3;
        X0 += X1;
        X1 = (X1 << R_512_4_0) | (X1 >> (64 - R_512_4_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_512_4_1) | (X3 >> (64 - R_512_4_1));
        X3 ^= X2;
        X4 += X5;
        X5 = (X5 << R_512_4_2) | (X5 >> (64 - R_512_4_2));
        X5 ^= X4;
        X6 += X7;
        X7 = (X7 << R_512_4_3) | (X7 >> (64 - R_512_4_3));
        X7 ^= X6;
        X2 += X1;
        X1 = (X1 << R_512_5_0) | (X1 >> (64 - R_512_5_0));
        X1 ^= X2;
        X4 += X7;
        X7 = (X7 << R_512_5_1) | (X7 >> (64 - R_512_5_1));
        X7 ^= X4;
        X6 += X5;
        X5 = (X5 << R_512_5_2) | (X5 >> (64 - R_512_5_2));
        X5 ^= X6;
        X0 += X3;
        X3 = (X3 << R_512_5_3) | (X3 >> (64 - R_512_5_3));
        X3 ^= X0;
        X4 += X1;
        X1 = (X1 << R_512_6_0) | (X1 >> (64 - R_512_6_0));
        X1 ^= X4;
        X6 += X3;
        X3 = (X3 << R_512_6_1) | (X3 >> (64 - R_512_6_1));
        X3 ^= X6;
        X0 += X5;
        X5 = (X5 << R_512_6_2) | (X5 >> (64 - R_512_6_2));
        X5 ^= X0;
        X2 += X7;
        X7 = (X7 << R_512_6_3) | (X7 >> (64 - R_512_6_3));
        X7 ^= X2;
        X6 += X1;
        X1 = (X1 << R_512_7_0) | (X1 >> (64 - R_512_7_0));
        X1 ^= X6;
        X0 += X7;
        X7 = (X7 << R_512_7_1) | (X7 >> (64 - R_512_7_1));
        X7 ^= X0;
        X2 += X5;
        X5 = (X5 << R_512_7_2) | (X5 >> (64 - R_512_7_2));
        X5 ^= X2;
        X4 += X3;
        X3 = (X3 << R_512_7_3) | (X3 >> (64 - R_512_7_3));
        X3 ^= X4;
        X0 += kw7;
        X1 += kw8;
        X2 += kw9;
        X3 += kw10;
        X4 += kw11;
        X5 += kw3 + kw1;
        X6 += kw4 + kw2;
        X7 += kw5 + 4;


        X0 += X1;
        X1 = (X1 << R_512_0_0) | (X1 >> (64 - R_512_0_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_512_0_1) | (X3 >> (64 - R_512_0_1));
        X3 ^= X2;
        X4 += X5;
        X5 = (X5 << R_512_0_2) | (X5 >> (64 - R_512_0_2));
        X5 ^= X4;
        X6 += X7;
        X7 = (X7 << R_512_0_3) | (X7 >> (64 - R_512_0_3));
        X7 ^= X6;
        X2 += X1;
        X1 = (X1 << R_512_1_0) | (X1 >> (64 - R_512_1_0));
        X1 ^= X2;
        X4 += X7;
        X7 = (X7 << R_512_1_1) | (X7 >> (64 - R_512_1_1));
        X7 ^= X4;
        X6 += X5;
        X5 = (X5 << R_512_1_2) | (X5 >> (64 - R_512_1_2));
        X5 ^= X6;
        X0 += X3;
        X3 = (X3 << R_512_1_3) | (X3 >> (64 - R_512_1_3));
        X3 ^= X0;
        X4 += X1;
        X1 = (X1 << R_512_2_0) | (X1 >> (64 - R_512_2_0));
        X1 ^= X4;
        X6 += X3;
        X3 = (X3 << R_512_2_1) | (X3 >> (64 - R_512_2_1));
        X3 ^= X6;
        X0 += X5;
        X5 = (X5 << R_512_2_2) | (X5 >> (64 - R_512_2_2));
        X5 ^= X0;
        X2 += X7;
        X7 = (X7 << R_512_2_3) | (X7 >> (64 - R_512_2_3));
        X7 ^= X2;
        X6 += X1;
        X1 = (X1 << R_512_3_0) | (X1 >> (64 - R_512_3_0));
        X1 ^= X6;
        X0 += X7;
        X7 = (X7 << R_512_3_1) | (X7 >> (64 - R_512_3_1));
        X7 ^= X0;
        X2 += X5;
        X5 = (X5 << R_512_3_2) | (X5 >> (64 - R_512_3_2));
        X5 ^= X2;
        X4 += X3;
        X3 = (X3 << R_512_3_3) | (X3 >> (64 - R_512_3_3));
        X3 ^= X4;
        X0 += kw8;
        X1 += kw9;
        X2 += kw10;
        X3 += kw11;
        X4 += kw3;
        X5 += kw4 + kw2;
        X6 += kw5 + kw0;
        X7 += kw6 + 5;
        X0 += X1;
        X1 = (X1 << R_512_4_0) | (X1 >> (64 - R_512_4_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_512_4_1) | (X3 >> (64 - R_512_4_1));
        X3 ^= X2;
        X4 += X5;
        X5 = (X5 << R_512_4_2) | (X5 >> (64 - R_512_4_2));
        X5 ^= X4;
        X6 += X7;
        X7 = (X7 << R_512_4_3) | (X7 >> (64 - R_512_4_3));
        X7 ^= X6;
        X2 += X1;
        X1 = (X1 << R_512_5_0) | (X1 >> (64 - R_512_5_0));
        X1 ^= X2;
        X4 += X7;
        X7 = (X7 << R_512_5_1) | (X7 >> (64 - R_512_5_1));
        X7 ^= X4;
        X6 += X5;
        X5 = (X5 << R_512_5_2) | (X5 >> (64 - R_512_5_2));
        X5 ^= X6;
        X0 += X3;
        X3 = (X3 << R_512_5_3) | (X3 >> (64 - R_512_5_3));
        X3 ^= X0;
        X4 += X1;
        X1 = (X1 << R_512_6_0) | (X1 >> (64 - R_512_6_0));
        X1 ^= X4;
        X6 += X3;
        X3 = (X3 << R_512_6_1) | (X3 >> (64 - R_512_6_1));
        X3 ^= X6;
        X0 += X5;
        X5 = (X5 << R_512_6_2) | (X5 >> (64 - R_512_6_2));
        X5 ^= X0;
        X2 += X7;
        X7 = (X7 << R_512_6_3) | (X7 >> (64 - R_512_6_3));
        X7 ^= X2;
        X6 += X1;
        X1 = (X1 << R_512_7_0) | (X1 >> (64 - R_512_7_0));
        X1 ^= X6;
        X0 += X7;
        X7 = (X7 << R_512_7_1) | (X7 >> (64 - R_512_7_1));
        X7 ^= X0;
        X2 += X5;
        X5 = (X5 << R_512_7_2) | (X5 >> (64 - R_512_7_2));
        X5 ^= X2;
        X4 += X3;
        X3 = (X3 << R_512_7_3) | (X3 >> (64 - R_512_7_3));
        X3 ^= X4;
        X0 += kw9;
        X1 += kw10;
        X2 += kw11;
        X3 += kw3;
        X4 += kw4;
        X5 += kw5 + kw0;
        X6 += kw6 + kw1;
        X7 += kw7 + 6;


        X0 += X1;
        X1 = (X1 << R_512_0_0) | (X1 >> (64 - R_512_0_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_512_0_1) | (X3 >> (64 - R_512_0_1));
        X3 ^= X2;
        X4 += X5;
        X5 = (X5 << R_512_0_2) | (X5 >> (64 - R_512_0_2));
        X5 ^= X4;
        X6 += X7;
        X7 = (X7 << R_512_0_3) | (X7 >> (64 - R_512_0_3));
        X7 ^= X6;
        X2 += X1;
        X1 = (X1 << R_512_1_0) | (X1 >> (64 - R_512_1_0));
        X1 ^= X2;
        X4 += X7;
        X7 = (X7 << R_512_1_1) | (X7 >> (64 - R_512_1_1));
        X7 ^= X4;
        X6 += X5;
        X5 = (X5 << R_512_1_2) | (X5 >> (64 - R_512_1_2));
        X5 ^= X6;
        X0 += X3;
        X3 = (X3 << R_512_1_3) | (X3 >> (64 - R_512_1_3));
        X3 ^= X0;
        X4 += X1;
        X1 = (X1 << R_512_2_0) | (X1 >> (64 - R_512_2_0));
        X1 ^= X4;
        X6 += X3;
        X3 = (X3 << R_512_2_1) | (X3 >> (64 - R_512_2_1));
        X3 ^= X6;
        X0 += X5;
        X5 = (X5 << R_512_2_2) | (X5 >> (64 - R_512_2_2));
        X5 ^= X0;
        X2 += X7;
        X7 = (X7 << R_512_2_3) | (X7 >> (64 - R_512_2_3));
        X7 ^= X2;
        X6 += X1;
        X1 = (X1 << R_512_3_0) | (X1 >> (64 - R_512_3_0));
        X1 ^= X6;
        X0 += X7;
        X7 = (X7 << R_512_3_1) | (X7 >> (64 - R_512_3_1));
        X7 ^= X0;
        X2 += X5;
        X5 = (X5 << R_512_3_2) | (X5 >> (64 - R_512_3_2));
        X5 ^= X2;
        X4 += X3;
        X3 = (X3 << R_512_3_3) | (X3 >> (64 - R_512_3_3));
        X3 ^= X4;
        X0 += kw10;
        X1 += kw11;
        X2 += kw3;
        X3 += kw4;
        X4 += kw5;
        X5 += kw6 + kw1;
        X6 += kw7 + kw2;
        X7 += kw8 + 7;
        X0 += X1;
        X1 = (X1 << R_512_4_0) | (X1 >> (64 - R_512_4_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_512_4_1) | (X3 >> (64 - R_512_4_1));
        X3 ^= X2;
        X4 += X5;
        X5 = (X5 << R_512_4_2) | (X5 >> (64 - R_512_4_2));
        X5 ^= X4;
        X6 += X7;
        X7 = (X7 << R_512_4_3) | (X7 >> (64 - R_512_4_3));
        X7 ^= X6;
        X2 += X1;
        X1 = (X1 << R_512_5_0) | (X1 >> (64 - R_512_5_0));
        X1 ^= X2;
        X4 += X7;
        X7 = (X7 << R_512_5_1) | (X7 >> (64 - R_512_5_1));
        X7 ^= X4;
        X6 += X5;
        X5 = (X5 << R_512_5_2) | (X5 >> (64 - R_512_5_2));
        X5 ^= X6;
        X0 += X3;
        X3 = (X3 << R_512_5_3) | (X3 >> (64 - R_512_5_3));
        X3 ^= X0;
        X4 += X1;
        X1 = (X1 << R_512_6_0) | (X1 >> (64 - R_512_6_0));
        X1 ^= X4;
        X6 += X3;
        X3 = (X3 << R_512_6_1) | (X3 >> (64 - R_512_6_1));
        X3 ^= X6;
        X0 += X5;
        X5 = (X5 << R_512_6_2) | (X5 >> (64 - R_512_6_2));
        X5 ^= X0;
        X2 += X7;
        X7 = (X7 << R_512_6_3) | (X7 >> (64 - R_512_6_3));
        X7 ^= X2;
        X6 += X1;
        X1 = (X1 << R_512_7_0) | (X1 >> (64 - R_512_7_0));
        X1 ^= X6;
        X0 += X7;
        X7 = (X7 << R_512_7_1) | (X7 >> (64 - R_512_7_1));
        X7 ^= X0;
        X2 += X5;
        X5 = (X5 << R_512_7_2) | (X5 >> (64 - R_512_7_2));
        X5 ^= X2;
        X4 += X3;
        X3 = (X3 << R_512_7_3) | (X3 >> (64 - R_512_7_3));
        X3 ^= X4;
        X0 += kw11;
        X1 += kw3;
        X2 += kw4;
        X3 += kw5;
        X4 += kw6;
        X5 += kw7 + kw2;
        X6 += kw8 + kw0;
        X7 += kw9 + 8;


        X0 += X1;
        X1 = (X1 << R_512_0_0) | (X1 >> (64 - R_512_0_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_512_0_1) | (X3 >> (64 - R_512_0_1));
        X3 ^= X2;
        X4 += X5;
        X5 = (X5 << R_512_0_2) | (X5 >> (64 - R_512_0_2));
        X5 ^= X4;
        X6 += X7;
        X7 = (X7 << R_512_0_3) | (X7 >> (64 - R_512_0_3));
        X7 ^= X6;
        X2 += X1;
        X1 = (X1 << R_512_1_0) | (X1 >> (64 - R_512_1_0));
        X1 ^= X2;
        X4 += X7;
        X7 = (X7 << R_512_1_1) | (X7 >> (64 - R_512_1_1));
        X7 ^= X4;
        X6 += X5;
        X5 = (X5 << R_512_1_2) | (X5 >> (64 - R_512_1_2));
        X5 ^= X6;
        X0 += X3;
        X3 = (X3 << R_512_1_3) | (X3 >> (64 - R_512_1_3));
        X3 ^= X0;
        X4 += X1;
        X1 = (X1 << R_512_2_0) | (X1 >> (64 - R_512_2_0));
        X1 ^= X4;
        X6 += X3;
        X3 = (X3 << R_512_2_1) | (X3 >> (64 - R_512_2_1));
        X3 ^= X6;
        X0 += X5;
        X5 = (X5 << R_512_2_2) | (X5 >> (64 - R_512_2_2));
        X5 ^= X0;
        X2 += X7;
        X7 = (X7 << R_512_2_3) | (X7 >> (64 - R_512_2_3));
        X7 ^= X2;
        X6 += X1;
        X1 = (X1 << R_512_3_0) | (X1 >> (64 - R_512_3_0));
        X1 ^= X6;
        X0 += X7;
        X7 = (X7 << R_512_3_1) | (X7 >> (64 - R_512_3_1));
        X7 ^= X0;
        X2 += X5;
        X5 = (X5 << R_512_3_2) | (X5 >> (64 - R_512_3_2));
        X5 ^= X2;
        X4 += X3;
        X3 = (X3 << R_512_3_3) | (X3 >> (64 - R_512_3_3));
        X3 ^= X4;
        X0 += kw3;
        X1 += kw4;
        X2 += kw5;
        X3 += kw6;
        X4 += kw7;
        X5 += kw8 + kw0;
        X6 += kw9 + kw1;
        X7 += kw10 + 9;
        X0 += X1;
        X1 = (X1 << R_512_4_0) | (X1 >> (64 - R_512_4_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_512_4_1) | (X3 >> (64 - R_512_4_1));
        X3 ^= X2;
        X4 += X5;
        X5 = (X5 << R_512_4_2) | (X5 >> (64 - R_512_4_2));
        X5 ^= X4;
        X6 += X7;
        X7 = (X7 << R_512_4_3) | (X7 >> (64 - R_512_4_3));
        X7 ^= X6;
        X2 += X1;
        X1 = (X1 << R_512_5_0) | (X1 >> (64 - R_512_5_0));
        X1 ^= X2;
        X4 += X7;
        X7 = (X7 << R_512_5_1) | (X7 >> (64 - R_512_5_1));
        X7 ^= X4;
        X6 += X5;
        X5 = (X5 << R_512_5_2) | (X5 >> (64 - R_512_5_2));
        X5 ^= X6;
        X0 += X3;
        X3 = (X3 << R_512_5_3) | (X3 >> (64 - R_512_5_3));
        X3 ^= X0;
        X4 += X1;
        X1 = (X1 << R_512_6_0) | (X1 >> (64 - R_512_6_0));
        X1 ^= X4;
        X6 += X3;
        X3 = (X3 << R_512_6_1) | (X3 >> (64 - R_512_6_1));
        X3 ^= X6;
        X0 += X5;
        X5 = (X5 << R_512_6_2) | (X5 >> (64 - R_512_6_2));
        X5 ^= X0;
        X2 += X7;
        X7 = (X7 << R_512_6_3) | (X7 >> (64 - R_512_6_3));
        X7 ^= X2;
        X6 += X1;
        X1 = (X1 << R_512_7_0) | (X1 >> (64 - R_512_7_0));
        X1 ^= X6;
        X0 += X7;
        X7 = (X7 << R_512_7_1) | (X7 >> (64 - R_512_7_1));
        X7 ^= X0;
        X2 += X5;
        X5 = (X5 << R_512_7_2) | (X5 >> (64 - R_512_7_2));
        X5 ^= X2;
        X4 += X3;
        X3 = (X3 << R_512_7_3) | (X3 >> (64 - R_512_7_3));
        X3 ^= X4;
        X0 += kw4;
        X1 += kw5;
        X2 += kw6;
        X3 += kw7;
        X4 += kw8;
        X5 += kw9 + kw1;
        X6 += kw10 + kw2;
        X7 += kw11 + 10;


        X0 += X1;
        X1 = (X1 << R_512_0_0) | (X1 >> (64 - R_512_0_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_512_0_1) | (X3 >> (64 - R_512_0_1));
        X3 ^= X2;
        X4 += X5;
        X5 = (X5 << R_512_0_2) | (X5 >> (64 - R_512_0_2));
        X5 ^= X4;
        X6 += X7;
        X7 = (X7 << R_512_0_3) | (X7 >> (64 - R_512_0_3));
        X7 ^= X6;
        X2 += X1;
        X1 = (X1 << R_512_1_0) | (X1 >> (64 - R_512_1_0));
        X1 ^= X2;
        X4 += X7;
        X7 = (X7 << R_512_1_1) | (X7 >> (64 - R_512_1_1));
        X7 ^= X4;
        X6 += X5;
        X5 = (X5 << R_512_1_2) | (X5 >> (64 - R_512_1_2));
        X5 ^= X6;
        X0 += X3;
        X3 = (X3 << R_512_1_3) | (X3 >> (64 - R_512_1_3));
        X3 ^= X0;
        X4 += X1;
        X1 = (X1 << R_512_2_0) | (X1 >> (64 - R_512_2_0));
        X1 ^= X4;
        X6 += X3;
        X3 = (X3 << R_512_2_1) | (X3 >> (64 - R_512_2_1));
        X3 ^= X6;
        X0 += X5;
        X5 = (X5 << R_512_2_2) | (X5 >> (64 - R_512_2_2));
        X5 ^= X0;
        X2 += X7;
        X7 = (X7 << R_512_2_3) | (X7 >> (64 - R_512_2_3));
        X7 ^= X2;
        X6 += X1;
        X1 = (X1 << R_512_3_0) | (X1 >> (64 - R_512_3_0));
        X1 ^= X6;
        X0 += X7;
        X7 = (X7 << R_512_3_1) | (X7 >> (64 - R_512_3_1));
        X7 ^= X0;
        X2 += X5;
        X5 = (X5 << R_512_3_2) | (X5 >> (64 - R_512_3_2));
        X5 ^= X2;
        X4 += X3;
        X3 = (X3 << R_512_3_3) | (X3 >> (64 - R_512_3_3));
        X3 ^= X4;
        X0 += kw5;
        X1 += kw6;
        X2 += kw7;
        X3 += kw8;
        X4 += kw9;
        X5 += kw10 + kw2;
        X6 += kw11 + kw0;
        X7 += kw3 + 11;
        X0 += X1;
        X1 = (X1 << R_512_4_0) | (X1 >> (64 - R_512_4_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_512_4_1) | (X3 >> (64 - R_512_4_1));
        X3 ^= X2;
        X4 += X5;
        X5 = (X5 << R_512_4_2) | (X5 >> (64 - R_512_4_2));
        X5 ^= X4;
        X6 += X7;
        X7 = (X7 << R_512_4_3) | (X7 >> (64 - R_512_4_3));
        X7 ^= X6;
        X2 += X1;
        X1 = (X1 << R_512_5_0) | (X1 >> (64 - R_512_5_0));
        X1 ^= X2;
        X4 += X7;
        X7 = (X7 << R_512_5_1) | (X7 >> (64 - R_512_5_1));
        X7 ^= X4;
        X6 += X5;
        X5 = (X5 << R_512_5_2) | (X5 >> (64 - R_512_5_2));
        X5 ^= X6;
        X0 += X3;
        X3 = (X3 << R_512_5_3) | (X3 >> (64 - R_512_5_3));
        X3 ^= X0;
        X4 += X1;
        X1 = (X1 << R_512_6_0) | (X1 >> (64 - R_512_6_0));
        X1 ^= X4;
        X6 += X3;
        X3 = (X3 << R_512_6_1) | (X3 >> (64 - R_512_6_1));
        X3 ^= X6;
        X0 += X5;
        X5 = (X5 << R_512_6_2) | (X5 >> (64 - R_512_6_2));
        X5 ^= X0;
        X2 += X7;
        X7 = (X7 << R_512_6_3) | (X7 >> (64 - R_512_6_3));
        X7 ^= X2;
        X6 += X1;
        X1 = (X1 << R_512_7_0) | (X1 >> (64 - R_512_7_0));
        X1 ^= X6;
        X0 += X7;
        X7 = (X7 << R_512_7_1) | (X7 >> (64 - R_512_7_1));
        X7 ^= X0;
        X2 += X5;
        X5 = (X5 << R_512_7_2) | (X5 >> (64 - R_512_7_2));
        X5 ^= X2;
        X4 += X3;
        X3 = (X3 << R_512_7_3) | (X3 >> (64 - R_512_7_3));
        X3 ^= X4;
        X0 += kw6;
        X1 += kw7;
        X2 += kw8;
        X3 += kw9;
        X4 += kw10;
        X5 += kw11 + kw0;
        X6 += kw3 + kw1;
        X7 += kw4 + 12;


        X0 += X1;
        X1 = (X1 << R_512_0_0) | (X1 >> (64 - R_512_0_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_512_0_1) | (X3 >> (64 - R_512_0_1));
        X3 ^= X2;
        X4 += X5;
        X5 = (X5 << R_512_0_2) | (X5 >> (64 - R_512_0_2));
        X5 ^= X4;
        X6 += X7;
        X7 = (X7 << R_512_0_3) | (X7 >> (64 - R_512_0_3));
        X7 ^= X6;
        X2 += X1;
        X1 = (X1 << R_512_1_0) | (X1 >> (64 - R_512_1_0));
        X1 ^= X2;
        X4 += X7;
        X7 = (X7 << R_512_1_1) | (X7 >> (64 - R_512_1_1));
        X7 ^= X4;
        X6 += X5;
        X5 = (X5 << R_512_1_2) | (X5 >> (64 - R_512_1_2));
        X5 ^= X6;
        X0 += X3;
        X3 = (X3 << R_512_1_3) | (X3 >> (64 - R_512_1_3));
        X3 ^= X0;
        X4 += X1;
        X1 = (X1 << R_512_2_0) | (X1 >> (64 - R_512_2_0));
        X1 ^= X4;
        X6 += X3;
        X3 = (X3 << R_512_2_1) | (X3 >> (64 - R_512_2_1));
        X3 ^= X6;
        X0 += X5;
        X5 = (X5 << R_512_2_2) | (X5 >> (64 - R_512_2_2));
        X5 ^= X0;
        X2 += X7;
        X7 = (X7 << R_512_2_3) | (X7 >> (64 - R_512_2_3));
        X7 ^= X2;
        X6 += X1;
        X1 = (X1 << R_512_3_0) | (X1 >> (64 - R_512_3_0));
        X1 ^= X6;
        X0 += X7;
        X7 = (X7 << R_512_3_1) | (X7 >> (64 - R_512_3_1));
        X7 ^= X0;
        X2 += X5;
        X5 = (X5 << R_512_3_2) | (X5 >> (64 - R_512_3_2));
        X5 ^= X2;
        X4 += X3;
        X3 = (X3 << R_512_3_3) | (X3 >> (64 - R_512_3_3));
        X3 ^= X4;
        X0 += kw7;
        X1 += kw8;
        X2 += kw9;
        X3 += kw10;
        X4 += kw11;
        X5 += kw3 + kw1;
        X6 += kw4 + kw2;
        X7 += kw5 + 13;
        X0 += X1;
        X1 = (X1 << R_512_4_0) | (X1 >> (64 - R_512_4_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_512_4_1) | (X3 >> (64 - R_512_4_1));
        X3 ^= X2;
        X4 += X5;
        X5 = (X5 << R_512_4_2) | (X5 >> (64 - R_512_4_2));
        X5 ^= X4;
        X6 += X7;
        X7 = (X7 << R_512_4_3) | (X7 >> (64 - R_512_4_3));
        X7 ^= X6;
        X2 += X1;
        X1 = (X1 << R_512_5_0) | (X1 >> (64 - R_512_5_0));
        X1 ^= X2;
        X4 += X7;
        X7 = (X7 << R_512_5_1) | (X7 >> (64 - R_512_5_1));
        X7 ^= X4;
        X6 += X5;
        X5 = (X5 << R_512_5_2) | (X5 >> (64 - R_512_5_2));
        X5 ^= X6;
        X0 += X3;
        X3 = (X3 << R_512_5_3) | (X3 >> (64 - R_512_5_3));
        X3 ^= X0;
        X4 += X1;
        X1 = (X1 << R_512_6_0) | (X1 >> (64 - R_512_6_0));
        X1 ^= X4;
        X6 += X3;
        X3 = (X3 << R_512_6_1) | (X3 >> (64 - R_512_6_1));
        X3 ^= X6;
        X0 += X5;
        X5 = (X5 << R_512_6_2) | (X5 >> (64 - R_512_6_2));
        X5 ^= X0;
        X2 += X7;
        X7 = (X7 << R_512_6_3) | (X7 >> (64 - R_512_6_3));
        X7 ^= X2;
        X6 += X1;
        X1 = (X1 << R_512_7_0) | (X1 >> (64 - R_512_7_0));
        X1 ^= X6;
        X0 += X7;
        X7 = (X7 << R_512_7_1) | (X7 >> (64 - R_512_7_1));
        X7 ^= X0;
        X2 += X5;
        X5 = (X5 << R_512_7_2) | (X5 >> (64 - R_512_7_2));
        X5 ^= X2;
        X4 += X3;
        X3 = (X3 << R_512_7_3) | (X3 >> (64 - R_512_7_3));
        X3 ^= X4;
        X0 += kw8;
        X1 += kw9;
        X2 += kw10;
        X3 += kw11;
        X4 += kw3;
        X5 += kw4 + kw2;
        X6 += kw5 + kw0;
        X7 += kw6 + 14;


        X0 += X1;
        X1 = (X1 << R_512_0_0) | (X1 >> (64 - R_512_0_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_512_0_1) | (X3 >> (64 - R_512_0_1));
        X3 ^= X2;
        X4 += X5;
        X5 = (X5 << R_512_0_2) | (X5 >> (64 - R_512_0_2));
        X5 ^= X4;
        X6 += X7;
        X7 = (X7 << R_512_0_3) | (X7 >> (64 - R_512_0_3));
        X7 ^= X6;
        X2 += X1;
        X1 = (X1 << R_512_1_0) | (X1 >> (64 - R_512_1_0));
        X1 ^= X2;
        X4 += X7;
        X7 = (X7 << R_512_1_1) | (X7 >> (64 - R_512_1_1));
        X7 ^= X4;
        X6 += X5;
        X5 = (X5 << R_512_1_2) | (X5 >> (64 - R_512_1_2));
        X5 ^= X6;
        X0 += X3;
        X3 = (X3 << R_512_1_3) | (X3 >> (64 - R_512_1_3));
        X3 ^= X0;
        X4 += X1;
        X1 = (X1 << R_512_2_0) | (X1 >> (64 - R_512_2_0));
        X1 ^= X4;
        X6 += X3;
        X3 = (X3 << R_512_2_1) | (X3 >> (64 - R_512_2_1));
        X3 ^= X6;
        X0 += X5;
        X5 = (X5 << R_512_2_2) | (X5 >> (64 - R_512_2_2));
        X5 ^= X0;
        X2 += X7;
        X7 = (X7 << R_512_2_3) | (X7 >> (64 - R_512_2_3));
        X7 ^= X2;
        X6 += X1;
        X1 = (X1 << R_512_3_0) | (X1 >> (64 - R_512_3_0));
        X1 ^= X6;
        X0 += X7;
        X7 = (X7 << R_512_3_1) | (X7 >> (64 - R_512_3_1));
        X7 ^= X0;
        X2 += X5;
        X5 = (X5 << R_512_3_2) | (X5 >> (64 - R_512_3_2));
        X5 ^= X2;
        X4 += X3;
        X3 = (X3 << R_512_3_3) | (X3 >> (64 - R_512_3_3));
        X3 ^= X4;
        X0 += kw9;
        X1 += kw10;
        X2 += kw11;
        X3 += kw3;
        X4 += kw4;
        X5 += kw5 + kw0;
        X6 += kw6 + kw1;
        X7 += kw7 + 15;
        X0 += X1;
        X1 = (X1 << R_512_4_0) | (X1 >> (64 - R_512_4_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_512_4_1) | (X3 >> (64 - R_512_4_1));
        X3 ^= X2;
        X4 += X5;
        X5 = (X5 << R_512_4_2) | (X5 >> (64 - R_512_4_2));
        X5 ^= X4;
        X6 += X7;
        X7 = (X7 << R_512_4_3) | (X7 >> (64 - R_512_4_3));
        X7 ^= X6;
        X2 += X1;
        X1 = (X1 << R_512_5_0) | (X1 >> (64 - R_512_5_0));
        X1 ^= X2;
        X4 += X7;
        X7 = (X7 << R_512_5_1) | (X7 >> (64 - R_512_5_1));
        X7 ^= X4;
        X6 += X5;
        X5 = (X5 << R_512_5_2) | (X5 >> (64 - R_512_5_2));
        X5 ^= X6;
        X0 += X3;
        X3 = (X3 << R_512_5_3) | (X3 >> (64 - R_512_5_3));
        X3 ^= X0;
        X4 += X1;
        X1 = (X1 << R_512_6_0) | (X1 >> (64 - R_512_6_0));
        X1 ^= X4;
        X6 += X3;
        X3 = (X3 << R_512_6_1) | (X3 >> (64 - R_512_6_1));
        X3 ^= X6;
        X0 += X5;
        X5 = (X5 << R_512_6_2) | (X5 >> (64 - R_512_6_2));
        X5 ^= X0;
        X2 += X7;
        X7 = (X7 << R_512_6_3) | (X7 >> (64 - R_512_6_3));
        X7 ^= X2;
        X6 += X1;
        X1 = (X1 << R_512_7_0) | (X1 >> (64 - R_512_7_0));
        X1 ^= X6;
        X0 += X7;
        X7 = (X7 << R_512_7_1) | (X7 >> (64 - R_512_7_1));
        X7 ^= X0;
        X2 += X5;
        X5 = (X5 << R_512_7_2) | (X5 >> (64 - R_512_7_2));
        X5 ^= X2;
        X4 += X3;
        X3 = (X3 << R_512_7_3) | (X3 >> (64 - R_512_7_3));
        X3 ^= X4;
        X0 += kw10;
        X1 += kw11;
        X2 += kw3;
        X3 += kw4;
        X4 += kw5;
        X5 += kw6 + kw1;
        X6 += kw7 + kw2;
        X7 += kw8 + 16;


        X0 += X1;
        X1 = (X1 << R_512_0_0) | (X1 >> (64 - R_512_0_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_512_0_1) | (X3 >> (64 - R_512_0_1));
        X3 ^= X2;
        X4 += X5;
        X5 = (X5 << R_512_0_2) | (X5 >> (64 - R_512_0_2));
        X5 ^= X4;
        X6 += X7;
        X7 = (X7 << R_512_0_3) | (X7 >> (64 - R_512_0_3));
        X7 ^= X6;
        X2 += X1;
        X1 = (X1 << R_512_1_0) | (X1 >> (64 - R_512_1_0));
        X1 ^= X2;
        X4 += X7;
        X7 = (X7 << R_512_1_1) | (X7 >> (64 - R_512_1_1));
        X7 ^= X4;
        X6 += X5;
        X5 = (X5 << R_512_1_2) | (X5 >> (64 - R_512_1_2));
        X5 ^= X6;
        X0 += X3;
        X3 = (X3 << R_512_1_3) | (X3 >> (64 - R_512_1_3));
        X3 ^= X0;
        X4 += X1;
        X1 = (X1 << R_512_2_0) | (X1 >> (64 - R_512_2_0));
        X1 ^= X4;
        X6 += X3;
        X3 = (X3 << R_512_2_1) | (X3 >> (64 - R_512_2_1));
        X3 ^= X6;
        X0 += X5;
        X5 = (X5 << R_512_2_2) | (X5 >> (64 - R_512_2_2));
        X5 ^= X0;
        X2 += X7;
        X7 = (X7 << R_512_2_3) | (X7 >> (64 - R_512_2_3));
        X7 ^= X2;
        X6 += X1;
        X1 = (X1 << R_512_3_0) | (X1 >> (64 - R_512_3_0));
        X1 ^= X6;
        X0 += X7;
        X7 = (X7 << R_512_3_1) | (X7 >> (64 - R_512_3_1));
        X7 ^= X0;
        X2 += X5;
        X5 = (X5 << R_512_3_2) | (X5 >> (64 - R_512_3_2));
        X5 ^= X2;
        X4 += X3;
        X3 = (X3 << R_512_3_3) | (X3 >> (64 - R_512_3_3));
        X3 ^= X4;
        X0 += kw11;
        X1 += kw3;
        X2 += kw4;
        X3 += kw5;
        X4 += kw6;
        X5 += kw7 + kw2;
        X6 += kw8 + kw0;
        X7 += kw9 + 17;
        X0 += X1;
        X1 = (X1 << R_512_4_0) | (X1 >> (64 - R_512_4_0));
        X1 ^= X0;
        X2 += X3;
        X3 = (X3 << R_512_4_1) | (X3 >> (64 - R_512_4_1));
        X3 ^= X2;
        X4 += X5;
        X5 = (X5 << R_512_4_2) | (X5 >> (64 - R_512_4_2));
        X5 ^= X4;
        X6 += X7;
        X7 = (X7 << R_512_4_3) | (X7 >> (64 - R_512_4_3));
        X7 ^= X6;
        X2 += X1;
        X1 = (X1 << R_512_5_0) | (X1 >> (64 - R_512_5_0));
        X1 ^= X2;
        X4 += X7;
        X7 = (X7 << R_512_5_1) | (X7 >> (64 - R_512_5_1));
        X7 ^= X4;
        X6 += X5;
        X5 = (X5 << R_512_5_2) | (X5 >> (64 - R_512_5_2));
        X5 ^= X6;
        X0 += X3;
        X3 = (X3 << R_512_5_3) | (X3 >> (64 - R_512_5_3));
        X3 ^= X0;
        X4 += X1;
        X1 = (X1 << R_512_6_0) | (X1 >> (64 - R_512_6_0));
        X1 ^= X4;
        X6 += X3;
        X3 = (X3 << R_512_6_1) | (X3 >> (64 - R_512_6_1));
        X3 ^= X6;
        X0 += X5;
        X5 = (X5 << R_512_6_2) | (X5 >> (64 - R_512_6_2));
        X5 ^= X0;
        X2 += X7;
        X7 = (X7 << R_512_6_3) | (X7 >> (64 - R_512_6_3));
        X7 ^= X2;
        X6 += X1;
        X1 = (X1 << R_512_7_0) | (X1 >> (64 - R_512_7_0));
        X1 ^= X6;
        X0 += X7;
        X7 = (X7 << R_512_7_1) | (X7 >> (64 - R_512_7_1));
        X7 ^= X0;
        X2 += X5;
        X5 = (X5 << R_512_7_2) | (X5 >> (64 - R_512_7_2));
        X5 ^= X2;
        X4 += X3;
        X3 = (X3 << R_512_7_3) | (X3 >> (64 - R_512_7_3));
        X3 ^= X4;
        X0 += kw3;
        X1 += kw4;
        X2 += kw5;
        X3 += kw6;
        X4 += kw7;
        X5 += kw8 + kw0;
        X6 += kw9 + kw1;
        X7 += kw10 + 18;

        m_state[0] = X0 ^ w[0];
        m_state[1] = X1 ^ w[1];
        m_state[2] = X2 ^ w[2];
        m_state[3] = X3 ^ w[3];
        m_state[4] = X4 ^ w[4];
        m_state[5] = X5 ^ w[5];
        m_state[6] = X6 ^ w[6];
        m_state[7] = X7 ^ w[7];

        m_flags = kw1 & ~0x4000000000000000;
    }

    void Skein::TransformBytes(byte* a_data, int a_index, int a_length)
    {
        if (a_length + m_buffer_pos > BlockSize)
        {
            int left = BlockSize - m_buffer_pos; 
              
            memcpy(&m_buffer[m_buffer_pos], &a_data[a_index], left);
            a_length -= left;
            a_index += left;
            m_buffer_pos += left;
                
            m_processed_bytes += BlockSize;
            TransformBlock(m_buffer, 0);
            m_buffer_pos = 0;
        }

        while (a_length > BlockSize)
        {
            m_processed_bytes += BlockSize;
            TransformBlock(a_data, a_index);
            a_length -= BlockSize;
            a_index += BlockSize;
        }

        if (a_length)
        {
            memcpy(&m_buffer[m_buffer_pos], &a_data[a_index], a_length);
            m_buffer_pos += a_length;
        }
    }

    void Skein::Finish()
    {
        m_flags |= 0x8000000000000000; 

        if (m_buffer_pos < BlockSize) 
            memset(&m_buffer[m_buffer_pos], 0, BlockSize - m_buffer_pos);

        m_processed_bytes += m_buffer_pos;
        TransformBlock(m_buffer, 0); 

        memset(m_buffer, 0, BlockSize);  
        m_processed_bytes = 8;
        m_flags = 0xFF00000000000000;
        m_buffer_pos = 0;

        TransformBlock(m_buffer, 0); 
    }  

    byte* Skein::TransformFinal()
    {
        Finish();
        byte* result = GetResult();
        Initialize();
        return result;
    }

    byte* Skein::GetResult()
    {
        byte* result = new byte[HashSize];
        memcpy(result, m_state, HashSize);
        return result;
    }
  
    void Skein::Initialize()
    {
        switch (HashSize)
        {
            case  28: memcpy(m_state, SKEIN_256_IV_224, sizeof(m_state));  break;
            case  32: memcpy(m_state, SKEIN_256_IV_256, sizeof(m_state));  break;
            case  48: memcpy(m_state, SKEIN_512_IV_384, sizeof(m_state));  break;
            case  64: memcpy(m_state, SKEIN_512_IV_512, sizeof(m_state));  break;
        }

        m_processed_bytes = 0;
        m_flags = 0x7000000000000000;
        m_buffer_pos = 0;
    }

    byte* Skein::ComputeBytes(byte* a_data, int a_legth)
    {
        Initialize();
        TransformBytes(a_data, 0, a_legth);
        return TransformFinal();
    }
}